Eggcellent_Data_Analysis¶
Author: Justin Garza
Date: See below
Description:
A look at Egg Data over time
In [1]:
from datetime import datetime
from IPython.display import display
from IPython.display import Markdown as MD
current_date = datetime.now().strftime('%Y-%m-%d')
version = datetime.now().strftime('%Y%m%d.%H%M')
display(MD(f"**Date:** {current_date}"))
display(MD(f"**version:** {version}"))
Date: 2025-02-18
version: 20250218.2148
Set Up¶
In [2]:
# this code to will import all the things i need for this notebook
import os
import re
import math
import numpy as np
import pandas as pd
# for the notebook rendering
from IPython.display import display, HTML
from IPython.display import Markdown as MD
# Graphs and Charts
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
# pandas Settings/Options
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option('display.width', 9000)
pd.set_option('max_colwidth', 400)
pd.set_option('display.float_format', '{:.3f}'.format)
# colormap
heatmapCM = sns.color_palette('Spectral_r', as_cmap=True)
## directories
DIR = os.getcwd()
print(f'{DIR=}')
DataDIR = os.path.join(DIR,'data')
OutDIR = os.path.join(DIR,'docs')
if not os.path.exists(DataDIR):
print('***DATA FOLDER IS MISSING***')
if not os.path.exists(OutDIR):
os.makedirs(OutDIR)
DIR='C:\\Users\\JGarza\\GitHub\\eggcellent_data_analysis'
In [3]:
# Define the nodes (labels)
node_labels = [
"🐔 Chicken", "🥚 Eggs", "🍳🍗 Market/Food", "🐣 Hatch", "🐔 Chickens"
]
# Define the links (source, target, and values)
source = [ 0, 1, 1, 3, 4, 4 ] # Chicken -> Eggs, Eggs -> Market/Food, etc.
target = [ 1, 2, 3, 4, 2, 0 ] # Eggs -> Market/Food, Eggs -> Hatch, etc.
values = [100, 50, 50, 50, 50, 50 ] # Flow values
# Create the Sankey diagram
fig = go.Figure(go.Sankey(
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=0.5),
label=node_labels
),
link=dict(
source=source,
target=target,
value=values
)
))
fig.update_layout(title_text="Chicken and Egg Sankey Diagram", font_size=12,template="plotly_dark")
# Set title and show figure
fig.show()
In [4]:
months = {
'JAN': 1,
'FEB': 2,
'MAR': 3,
'APR': 4,
'MAY': 5,
'JUN': 6,
'JUL': 7,
'AUG': 8,
'SEP': 9,
'OCT': 10,
'NOV': 11,
'DEC': 12
}
def getMonthNum(mon:str):
for m in months.keys():
if m.upper() in mon:
return months[m]
print(getMonthNum('JAN'))
print(getMonthNum('JUN'))
print(getMonthNum('FIRST OF DEC'))
1 6 12
Sources¶
- QuickStats NASS
- for EggLayers and EggPrices
- macroTrends
- for US Population
In [5]:
# EggLayers
el = pd.read_csv(os.path.join(DataDIR,'EggLayers.csv'))
el = el[['Year','Period','Value']]
el['Month'] = el.Period.apply(getMonthNum)
el['Day'] = 1
el['date'] = pd.to_datetime(el[['Year','Month','Day']])
el = el[['date','Value']]
el['Value'] = el['Value'].str.replace(',', '', regex=True)
el['Value'] = pd.to_numeric(el['Value'])
el['Value'] /=1_000_000_000
el = el.sort_values(by='date',ascending=True)
display(el.head(5))
display(el.tail(5))
| date | Value | |
|---|---|---|
| 197 | 2008-01-01 | 0.346 |
| 196 | 2008-02-01 | 0.344 |
| 200 | 2008-03-01 | 0.343 |
| 193 | 2008-04-01 | 0.342 |
| 201 | 2008-05-01 | 0.341 |
| date | Value | |
|---|---|---|
| 12 | 2024-09-01 | 0.372 |
| 11 | 2024-10-01 | 0.377 |
| 10 | 2024-11-01 | 0.376 |
| 3 | 2024-12-01 | 0.376 |
| 0 | 2025-01-01 | 0.369 |
In [6]:
# Dollars per Dozen
dd = pd.read_csv(os.path.join(DataDIR,'DollarsPerDozen.csv'))
dd['Month'] = dd.Period.apply(getMonthNum)
dd['Day'] = 1
dd['date'] = pd.to_datetime(dd[['Year','Month','Day']])
dd = dd[['date','Value']]
dd = dd.sort_values(by='date',ascending=True)
display(dd.head(5))
display(dd.tail(5))
| date | Value | |
|---|---|---|
| 293 | 2000-12-01 | 0.674 |
| 285 | 2001-01-01 | 0.507 |
| 284 | 2001-02-01 | 0.501 |
| 288 | 2001-03-01 | 0.517 |
| 281 | 2001-04-01 | 0.484 |
| date | Value | |
|---|---|---|
| 1 | 2024-08-01 | 3.300 |
| 11 | 2024-09-01 | 1.960 |
| 10 | 2024-10-01 | 2.720 |
| 9 | 2024-11-01 | 3.220 |
| 2 | 2024-12-01 | 4.610 |
In [7]:
# US Population
up = pd.read_csv(os.path.join(DataDIR,'USPop.csv'))
up['Month'] = 1
up['Day'] = 1
up['date'] = pd.to_datetime(up[['Year','Month','Day']])
up = up[['date','Population','Growth Rate']]
up['Population'] = up['Population'].str.replace(',', '', regex=True)
up['Population'] = pd.to_numeric(up['Population'])
up['Population'] /=1_000_000_000
up = up.sort_values(by='date',ascending=True)
display(up.head(5))
display(up.tail(5))
| date | Population | Growth Rate | |
|---|---|---|---|
| 17 | 2008-01-01 | 0.306 | 0.97% |
| 16 | 2009-01-01 | 0.309 | 0.92% |
| 15 | 2010-01-01 | 0.311 | 0.87% |
| 14 | 2011-01-01 | 0.314 | 0.87% |
| 13 | 2012-01-01 | 0.317 | 0.88% |
| date | Population | Growth Rate | |
|---|---|---|---|
| 4 | 2021-01-01 | 0.337 | 0.31% |
| 3 | 2022-01-01 | 0.338 | 0.38% |
| 2 | 2023-01-01 | 0.340 | 0.50% |
| 1 | 2024-01-01 | 0.342 | 0.53% |
| 0 | 2025-01-01 | 0.344 | 0.52% |
In [8]:
# Create Plotly figure
fig = go.Figure()
# First trace (Primary Y-axis)
fig.add_trace(go.Scatter(
x=el['date'],
y=el['Value'],
mode='lines',
name='EggLayers',
yaxis="y1"
))
# Second trace (Second Y-axis)
fig.add_trace(go.Scatter(
x=dd['date'],
y=dd['Value'],
mode='lines',
name='Dollars Per Dozen',
yaxis="y2"
))
# Third trace (Third Y-axis)
fig.add_trace(go.Scatter(
x=up['date'],
y=up['Population'],
mode='lines',
name='US Pop.',
yaxis="y3"
))
# Layout configuration
fig.update_layout(
title="EggLayers x Dollars Per Dozen x US Population",
xaxis=dict(title="X Axis"),
# First Y-axis (left side)
yaxis=dict(title="EggLayers"),
# Second Y-axis (right side)
yaxis2=dict(title="Dollars Per Dozen",
overlaying="y", side="right"),
# Third Y-axis (right side but slightly shifted)
yaxis3=dict(title="US Pop.",
overlaying="y", side="right", anchor="x", position=1),
template="plotly_dark",
height=750 ,
)
# Show the plot
fig.show()
Here are the major instances when chickens were culled in the United States to prevent the spread of bird flu (avian influenza), along with specific time frames:
Major Bird Flu Culling Events in the U.S.¶
- 2014–2015 (December 2014 – June 2015) – One of the largest outbreaks in U.S. history, involving H5N2 and H5N8, resulted in the culling of over 50 million birds, particularly in Iowa and Minnesota.
- 2022 (February–December) – A highly pathogenic H5N1 strain spread rapidly, causing the culling of over 50 million birds across multiple states, including Iowa, Nebraska, and Colorado.
- 2023 (January–December) – The outbreak continued into 2023, with additional cullings due to the persistent spread of H5N1.
- 2024 (Ongoing) – Outbreaks of bird flu have continued in poultry farms, leading to periodic cullings of infected flocks to prevent further spread.